#!/bin/bash
SCRIPT_PATH=/share/projset/dsir7
cd $SCRIPT_PATH
CONFIG_FILE=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/feat_config.json
RAW_DATASETS=$1
name=`basename $RAW_DATASETS .jsonl`
TARGET_DATASETS=$2
tname=`basename $RAW_DATASETS .jsonl`
NUM_TO_SAMPLE=1000
MIN_EXAMPLE_LENGTH=0
MERGED_DIR=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/"$name"_"$tname"_0.0_0/resampled
OUTFILE=/share/projset/dsir7/indexes/qwen_tokenizer_3_300000/"$name"-llmevalv1.jsonl
bash scripts/search.sh $CONFIG_FILE $RAW_DATASETS $TARGET_DATASETS $NUM_TO_SAMPLE $MIN_EXAMPLE_LENGTH
bash scripts/merged_jsonl.sh $MERGED_DIR $OUTFILE
echo "FINISHED"